IOS Mobile App Analysis

Team 4 B for BA780

Team members: Yuhong Lu, Xiaohan Mei, Ziyan Pei, Peng Yuan, Mengqing Zhang, Jiayuan Zou

library(tidyverse)
Registered S3 method overwritten by 'dplyr':
  method           from
  print.rowwise_df     
-- Attaching packages --------------------------------------- tidyverse 1.2.1 --
v ggplot2 3.2.1     v purrr   0.3.2
v tibble  2.1.3     v dplyr   0.8.3
v tidyr   0.8.3     v stringr 1.4.0
v readr   1.3.1     v forcats 0.4.0
-- Conflicts ------------------------------------------ tidyverse_conflicts() --
x dplyr::filter() masks stats::filter()
x dplyr::lag()    masks stats::lag()
library(readr)
library(magrittr)

Attaching package: 㤼㸱magrittr㤼㸲

The following object is masked from 㤼㸱package:purrr㤼㸲:

    set_names

The following object is masked from 㤼㸱package:tidyr㤼㸲:

    extract
library(ggplot2)
Data Cleaning & Merge
AppleStore <- read_csv("AppleStore.csv")
Missing column names filled in: 'X1' [1]Parsed with column specification:
cols(
  X1 = col_double(),
  id = col_double(),
  track_name = col_character(),
  size_bytes = col_double(),
  currency = col_character(),
  price = col_double(),
  rating_count_tot = col_double(),
  rating_count_ver = col_double(),
  user_rating = col_double(),
  user_rating_ver = col_double(),
  ver = col_character(),
  cont_rating = col_character(),
  prime_genre = col_character(),
  sup_devices.num = col_double(),
  ipadSc_urls.num = col_double(),
  lang.num = col_double(),
  vpp_lic = col_double()
)
descript <- read_csv("appleStore_description.csv")
Parsed with column specification:
cols(
  id = col_double(),
  track_name = col_character(),
  size_bytes = col_double(),
  app_desc = col_character()
)
AppleStore
descript
# Apple Store
AppleStore$currency <- as.factor(AppleStore$currency)
# Get primary key - ID
AppleStore %>% select(id) %>% group_by(id) %>% summarize(count=n()) %>% filter(count>1)
AppleStore %>% select(track_name) %>% group_by(track_name) %>% summarize(count=n()) %>% filter(count>1)
descript %>% select(id) %>% group_by(id) %>% summarize(count=n()) %>% filter(count>1)
descript %>% select(track_name) %>% group_by(track_name) %>% summarize(count=n()) %>% filter(count>1)
# the primary key is ID
# Merge two dataset
AppleStore %>% merge(descript, by='id') %>% 
  select(everything(),-X1,-size_bytes.x,-track_name.y) %>% rename(size_byte=size_bytes.y) ->AP
AP_omit <- na.omit(AP)
any(is.na(AP_omit))
[1] FALSE
write.csv(AP_omit,'AP_omit.csv')
AP_omit
NA
LS0tDQp0aXRsZTogIklPUyBNb2JpbGUgQXBwIEFuYWx5c2lzIg0Kb3V0cHV0OiBodG1sX25vdGVib29rDQotLS0NCg0KIyMjIElPUyBNb2JpbGUgQXBwIEFuYWx5c2lzDQojIyMjIFRlYW0gNCBCIGZvciBCQTc4MA0KVGVhbSBtZW1iZXJzOg0KWXVob25nIEx1LCBYaWFvaGFuIE1laSwgWml5YW4gUGVpLCBQZW5nIFl1YW4sIE1lbmdxaW5nIFpoYW5nLCBKaWF5dWFuIFpvdQ0KDQpgYGB7ciBlY2hvPVRSVUV9DQpsaWJyYXJ5KHRpZHl2ZXJzZSkNCmxpYnJhcnkocmVhZHIpDQpsaWJyYXJ5KG1hZ3JpdHRyKQ0KbGlicmFyeShnZ3Bsb3QyKQ0KYGBgDQoNCiMjIyMjIERhdGEgQ2xlYW5pbmcgJiBNZXJnZQ0KYGBge3IgZWNobz1UUlVFfQ0KQXBwbGVTdG9yZSA8LSByZWFkX2NzdigiQXBwbGVTdG9yZS5jc3YiKQ0KZGVzY3JpcHQgPC0gcmVhZF9jc3YoImFwcGxlU3RvcmVfZGVzY3JpcHRpb24uY3N2IikNCkFwcGxlU3RvcmUNCmRlc2NyaXB0DQojIEFwcGxlIFN0b3JlDQpBcHBsZVN0b3JlJGN1cnJlbmN5IDwtIGFzLmZhY3RvcihBcHBsZVN0b3JlJGN1cnJlbmN5KQ0KIyBHZXQgcHJpbWFyeSBrZXkgLSBJRA0KQXBwbGVTdG9yZSAlPiUgc2VsZWN0KGlkKSAlPiUgZ3JvdXBfYnkoaWQpICU+JSBzdW1tYXJpemUoY291bnQ9bigpKSAlPiUgZmlsdGVyKGNvdW50PjEpDQpBcHBsZVN0b3JlICU+JSBzZWxlY3QodHJhY2tfbmFtZSkgJT4lIGdyb3VwX2J5KHRyYWNrX25hbWUpICU+JSBzdW1tYXJpemUoY291bnQ9bigpKSAlPiUgZmlsdGVyKGNvdW50PjEpDQpkZXNjcmlwdCAlPiUgc2VsZWN0KGlkKSAlPiUgZ3JvdXBfYnkoaWQpICU+JSBzdW1tYXJpemUoY291bnQ9bigpKSAlPiUgZmlsdGVyKGNvdW50PjEpDQpkZXNjcmlwdCAlPiUgc2VsZWN0KHRyYWNrX25hbWUpICU+JSBncm91cF9ieSh0cmFja19uYW1lKSAlPiUgc3VtbWFyaXplKGNvdW50PW4oKSkgJT4lIGZpbHRlcihjb3VudD4xKQ0KIyB0aGUgcHJpbWFyeSBrZXkgaXMgSUQNCiMgTWVyZ2UgdHdvIGRhdGFzZXQNCkFwcGxlU3RvcmUgJT4lIG1lcmdlKGRlc2NyaXB0LCBieT0naWQnKSAlPiUgDQogIHNlbGVjdChldmVyeXRoaW5nKCksLVgxLC1zaXplX2J5dGVzLngsLXRyYWNrX25hbWUueSkgJT4lIHJlbmFtZShzaXplX2J5dGU9c2l6ZV9ieXRlcy55KSAtPkFQDQpBUF9vbWl0IDwtIG5hLm9taXQoQVApDQphbnkoaXMubmEoQVBfb21pdCkpDQp3cml0ZS5jc3YoQVBfb21pdCwnQVBfb21pdC5jc3YnKQ0KQVBfb21pdA0KDQpgYGANCg0K